training module: shl_tm
prediction module: shl_pm
simulation module: shl_sm
misc module: shl_mm
historical bidding price, per second, time series
live bidding price, per second, time series
parm_si (seasonality index per second)
parm_month (parameter like alpha, beta, gamma, etc. per month)
In [27]:
%matplotlib inline
import matplotlib.pyplot as plt
In [1]:
import pandas as pd
In [2]:
df_history_ts_process = pd.read_csv('data/history_ts.csv')
df_history_ts_process.tail()
Out[2]:
In [3]:
df_history_table_process = pd.read_csv('data/history_table.csv')
df_history_table_process.tail()
Out[3]:
In [6]:
df_parm_si = pd.read_csv('data/parm_si.csv')
# print(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07') & (df_parm_si['time'] == '11:29:00')].iloc[0]['si'])
df_parm_si.tail()
Out[6]:
In [7]:
df_parm_month = pd.read_csv('data/parm_month.csv')
# print(df_parm_month[(df_parm_month['ccyy-mm'] == '2017-07') & (df_parm_month['time'] == '11:29:00')].iloc[0]['di'])
df_parm_month.tail()
Out[7]:
In [8]:
# function to fetch Seasonality-Index
def fetech_si(ccyy_mm, time, df_parm_si):
# return df_parm_si[(df_parm_si['ccyy-mm'] == '2017-09') & (df_parm_si['time'] == '11:29:00')]
return df_parm_si[(df_parm_si['ccyy-mm'] == ccyy_mm) & (df_parm_si['time'] == time)].iloc[0]['si']
In [9]:
# function to fetch Dynamic-Increment
def fetech_di(ccyy_mm, df_parm_month):
# print(df_parm_month[df_parm_month['ccyy-mm'] == '2017-07'].iloc[0]['di'])
return df_parm_month[df_parm_month['ccyy-mm'] == ccyy_mm].iloc[0]['di']
In [62]:
def get_previous_n_sec_time_as_str(df_time_field, n):
return str((pd.to_datetime(df_time_field, format='%H:%M:%S') - pd.Timedelta(seconds=n)).time())
# print(get_previous_n_sec_time_as_str('11:29:57',3))
def get_future_n_sec_time_as_str(df_time_field, n):
return str((pd.to_datetime(df_time_field, format='%H:%M:%S') - pd.Timedelta(seconds=-n)).time())
# print(get_future_n_sec_time_as_str('11:29:57',3))
In [ ]:
In [42]:
# which month to predict?
global_parm_ccyy_mm = '2017-07'
# create global base price
global_parm_base_price = 10000000
# create predictino results dataframe: shl_pm
# df_shl_pm = pd.DataFrame()
global_parm_dynamic_increment = fetech_di(global_parm_ccyy_mm, df_parm_month)
global_parm_alpha = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['alpha']
global_parm_beta = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['beta']
global_parm_gamma = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['gamma']
global_parm_sec57_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['sec57-weight']
global_parm_month_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['month-weight']
global_parm_short_weight = df_parm_month[df_parm_month['ccyy-mm'] == global_parm_ccyy_mm].iloc[0]['short-weight']
print('=================================================')
print(' Global Parameters for Month : %s' % global_parm_ccyy_mm)
print('-------------------------------------------------')
print('global_parm_dynamic_increment : %d' % global_parm_dynamic_increment)
print('global_parm_alpha : %0.15f' % global_parm_alpha) # used in forecasting
print('global_parm_beta : %0.15f' % global_parm_beta) # used in forecasting
print('global_parm_gamma : %0.15f' % global_parm_gamma) # used in forecasting
print('global_parm_sec57_weight : %f' % global_parm_sec57_weight) # used in training a model
print('global_parm_month_weight : %f' % global_parm_month_weight) # used in training a model
print('global_parm_short_weight : %f' % global_parm_short_weight) # used in training a model
print('=================================================')
# plot seasonality index
# print(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07')]['si'])
plt.figure(figsize=(6,3))
plt.plot(df_parm_si[(df_parm_si['ccyy-mm'] == '2017-07')]['si'])
Out[42]:
In [ ]:
In [154]:
# 11:29:00~11:29:50
for i in range(1830, 1830+51): # use July 2015 data as simulatino
print('\n<<<< Record No.: %5d >>>>' % i)
print(df_history_ts_process['ccyy-mm'][i]) # format: ccyy-mm
print(df_history_ts_process['time'][i]) # format: hh:mm:ss
print(df_history_ts_process['bid-price'][i]) # format: integer
# print(df_history_ts_process['ref-price'][i])
# capture & calculate 11:29:00 bid price - 1 = base price
if df_history_ts_process['time'][i] == '11:29:00':
global_parm_base_price = df_history_ts_process['bid-price'][i] -1
print('#### global_parm_base_price : %d ####' % global_parm_base_price)
print('---- Pre-Process ---')
# pre-process: ccyy-mm-hh:mm:ss
current_datetime = df_history_ts_process['ccyy-mm'][i] + ' ' + df_history_ts_process['time'][i]
current_price4pm = df_history_ts_process['bid-price'][i] - global_parm_base_price
print('#### current_datetime : %s ####' % current_datetime)
print('#### current_price4pm : %d ####' % current_price4pm)
# get Seasonality-Index
current_si = fetech_si(df_history_ts_process['ccyy-mm'][i]
,df_history_ts_process['time'][i]
,df_parm_si)
print('#### current_si : %0.10f ####' % current_si)
# get de-seasoned price: price4pmsi
current_price4pmsi = current_price4pm / current_si
print('#### current_price4pmsi : %0.10f ####' % current_price4pmsi)
if df_history_ts_process['time'][i] == '11:29:00':
df_shl_pm = pd.DataFrame() # initialize prediction dataframe at 11:29:00
print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
current_pred_les_level = current_price4pmsi
current_pred_les_trend = 0
current_pred_les = current_pred_les_level + current_pred_les_trend
current_pred_les_misc = 0
current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
+ global_parm_base_price
current_pred_price_rounded = round(current_pred_price/100, 0) * 100
current_pred_dynamic_increment = global_parm_dynamic_increment
current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
else:
# call predicitno functino shl_pm, forcaste next k=1 step
print('---- call predicitno function shl_pm ---- %s' % df_history_ts_process['time'][i])
previous_time = get_previous_n_sec_time_as_str(df_history_ts_process['time'][i], 1)
previous_pred_les_level = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
& (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_level']
print(' previous_pred_les_level : %f' % previous_pred_les_level)
previous_pred_les_trend = df_shl_pm[(df_shl_pm['ccyy-mm'] == df_history_ts_process['ccyy-mm'][i]) \
& (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_trend']
print(' previous_pred_les_trend : %f' % previous_pred_les_trend)
current_pred_les_level = global_parm_alpha * current_price4pmsi \
+ (1 - global_parm_alpha) * (previous_pred_les_level + previous_pred_les_trend)
print(' current_pred_les_level : %f' % current_pred_les_level)
current_pred_les_trend = global_parm_beta * (current_pred_les_level - previous_pred_les_level) \
+ (1 - global_parm_beta) * previous_pred_les_trend
print(' current_pred_les_trend : %f' % current_pred_les_trend)
current_pred_les = current_pred_les_level + current_pred_les_trend
current_pred_les_misc = 0
current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
+ global_parm_base_price
current_pred_price_rounded = round(current_pred_price/100, 0) * 100
current_pred_dynamic_increment = global_parm_dynamic_increment
current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
# write results to shl_pm dataframe
df_shl_pm_current = {
'ccyy-mm' : df_history_ts_process['ccyy-mm'][i]
,'time' : df_history_ts_process['time'][i]
,'bid' : df_history_ts_process['bid-price'][i]
,'datetime' : current_datetime
,'price4pm' : current_price4pm
,'si' : current_si
,'price4pmsi' : current_price4pmsi
,'pred_les_level' : current_pred_les_level
,'pred_les_trend' : current_pred_les_trend
,'current_pred_les' : current_pred_les
,'pred_les_misc' : current_pred_les_misc
,'pred_price' : current_pred_price
,'pred_price_rounded' : current_pred_price_rounded
,'pred_dynamic_increment' : current_pred_dynamic_increment # +200 or + 300
,'pred_set_price_rounded' : current_pred_set_price_rounded
}
df_shl_pm = df_shl_pm.append(df_shl_pm_current, ignore_index=True)
In [155]:
df_shl_pm.head()
Out[155]:
In [157]:
df_shl_pm.tail()
Out[157]:
In [124]:
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm['bid'])
# plt.plot(df_shl_pm['pred_price'])
plt.plot(df_shl_pm['pred_price'].shift(1))
# plt.plot(df_shl_pm['pred_price'].shift(-1))
Out[124]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [166]:
# 11:29:51~
def predict_k_step_price(df_shl_pm, ccyy_mm, time, k):
print('month & time : ', ccyy_mm, time)
print()
# df_shl_pm_k = pd.DataFrame() # initialize prediction dataframe
for sec in range(0, k):
print('delta second(s) : ', sec)
current_time = get_future_n_sec_time_as_str(time, sec)
print('current_time : %s' % current_time)
previous_time = get_previous_n_sec_time_as_str(current_time, 1)
print('previous_time : %s' % previous_time)
previous_pred_les_level = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
& (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_level']
print(' previous_pred_les_level : %f' % previous_pred_les_level)
previous_pred_les_trend = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
& (df_shl_pm['time'] ==previous_time)].iloc[0]['pred_les_trend']
print(' previous_pred_les_trend : %f' % previous_pred_les_trend)
print('---- Pre-Process ---')
############ use predicted value for boost-trap
previous_pred_price = df_shl_pm[(df_shl_pm['ccyy-mm'] == global_parm_ccyy_mm) \
& (df_shl_pm['time'] == previous_time)].iloc[0]['pred_price']
# pre-process: ccyy-mm-hh:mm:ss
current_datetime = global_parm_ccyy_mm + ' ' + current_time
# current_price4pm = df_history_ts_process['bid-price'][i] - global_parm_base_price
current_price4pm = previous_pred_price - global_parm_base_price
print('#### current_datetime : %s ####' % current_datetime)
print('#### previous_pred_price: %s ####' % previous_pred_price)
print('#### current_price4pm : %d ####' % current_price4pm)
# get Seasonality-Index
current_si = fetech_si(global_parm_ccyy_mm
,current_time
,df_parm_si)
print('#### current_si : %0.10f ####' % current_si)
# get de-seasoned price: price4pmsi
current_price4pmsi = current_price4pm / current_si
print('#### current_price4pmsi : %0.10f ####' % current_price4pmsi)
current_pred_les_level = global_parm_alpha * current_price4pmsi \
+ (1 - global_parm_alpha) * (previous_pred_les_level + previous_pred_les_trend)
print(' current_pred_les_level : %f' % current_pred_les_level)
current_pred_les_trend = global_parm_beta * (current_pred_les_level - previous_pred_les_level) \
+ (1 - global_parm_beta) * previous_pred_les_trend
print(' current_pred_les_trend : %f' % current_pred_les_trend)
current_pred_les = current_pred_les_level + current_pred_les_trend
current_pred_les_misc = 0
current_pred_price = (current_pred_les + current_pred_les_misc) * current_si \
+ global_parm_base_price
current_pred_price_rounded = round(current_pred_price/100, 0) * 100
current_pred_dynamic_increment = global_parm_dynamic_increment
current_pred_set_price_rounded = current_pred_price_rounded + current_pred_dynamic_increment
# write results to shl_pm dataframe
df_shl_pm_current = {
'ccyy-mm' : global_parm_ccyy_mm
,'time' : current_time
,'bid' : previous_pred_price
,'datetime' : current_datetime
,'price4pm' : current_price4pm
,'si' : current_si
,'price4pmsi' : current_price4pmsi
,'pred_les_level' : current_pred_les_level
,'pred_les_trend' : current_pred_les_trend
,'current_pred_les' : current_pred_les
,'pred_les_misc' : current_pred_les_misc
,'pred_price' : current_pred_price
,'pred_price_rounded' : current_pred_price_rounded
,'pred_dynamic_increment' : current_pred_dynamic_increment # +200 or + 300
,'pred_set_price_rounded' : current_pred_set_price_rounded
}
print('---------------------------')
df_shl_pm = df_shl_pm.append(df_shl_pm_current, ignore_index=True)
return df_shl_pm
In [178]:
df_shl_pm_new = predict_k_step_price(df_shl_pm, global_parm_ccyy_mm, '11:29:51', 10)
In [181]:
df_shl_pm_new.tail(11)
Out[181]:
In [182]:
df_shl_pm.tail()
Out[182]:
In [186]:
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm_new['bid'])
plt.plot(df_shl_pm_new['pred_price'].shift(1))
# plt.plot(df_shl_pm_k['pred_price'].shift(-1))
Out[186]:
In [187]:
plt.figure(figsize=(12,6))
plt.plot(df_shl_pm['bid'])
# plt.plot(df_shl_pm['pred_price'])
plt.plot(df_shl_pm['pred_price'].shift(1))
# plt.plot(df_shl_pm['pred_price'].shift(-1))
Out[187]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
# from __future__ import print_function, division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pandas as pd
import operator
from scipy import interp
from itertools import cycle
from sklearn import svm
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc
from statsmodels.graphics.mosaicplot import mosaic
print(__doc__)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: